################################################################################
## This script generates the simulations appearing in Section 6.
## Results are printed to standard out.

## USAGE: Rscript example_simulations.R
################################################################################

library(sandwich)

################################################################################
## functions to generate simulated data
################################################################################
generateDataExample3 <- function() {
  set.seed(7)
  p=1000 # number of precincts, in practice should be observed
  nn=150  
  n=rep(nn,p) #precinct sizes, in practice should be observed
  x=runif(p,0,1) #proportions of blacks, in practice should be observed
  ww0=1 #white vote prob in pure white precinct, unavailable in practice
  ww1=0 #white vote prob in pure black precinct, unavailable in practice
  bb0=0  #black vote prob in pure white precinct, unavailable in practice
  bb1=0  #black vote prob in pure black precinct, unavailable in practice
  w0=log(ww0/(1-ww0))
   #in practice w0 should be estimated by regressing t<-w0+c1x+d1x^2
  w1=log(ww1/(1-ww1))-log(ww0/(1-ww0))
  b0=log(bb0/(1-bb0))
  b1=log(bb1/(1-bb1))-log(bb0/(1-bb0))
  b=rep(0.5,p)
  w=b
  s=0  #extra noise sd
  for(i in 1:p) {
    b[i]=0
    w[i]= 1-x[i] 
  }
  t=(w*round(n*(1-x))+b*round(n*x))/n #comb'd vote prop, in practice is observed
  bd=sum(round(n*x)*b)/sum(round(n*x)) #true district b
  
  generatedData <- list()
  generatedData[["x"]] <- x
  generatedData[["t"]] <- t
  generatedData[["n"]] <- n
  generatedData[["trueBetaB"]] <- bd
  return(generatedData)
}

generateDataExample4 <- function() {
  set.seed(7)
  p=1000 # number of precincts, in practice should be observed
  nn=150 
  n=rep(nn,p) #precinct sizes, in practice should be observed
  x=runif(p,0,0.95) #proportions of blacks, in practice should be observed
  l=min(x)  
  u=max(x) 
  ww0=0.9 #white vote prob in pure white precinct, unavailable in practice
  ww1=0.9 #white vote prob in pure black precinct, unavailable in practice
  bb0=0.9 #black vote prob in pure white precinct, unavailable in practice
  bb1=0.6 #black vote prob in pure black precinct, unavailable in practice
  w0=log(ww0/(1-ww0))
  #in practice w0 should be estimated by regressing t<-w0+c1x+d1x^2
  w1=log(ww1/(1-ww1))-log(ww0/(1-ww0))
  b0=log(bb0/(1-bb0))
  b1=log(bb1/(1-bb1))-log(bb0/(1-bb0))
  b=rep(0.5,p)
  w=b
  s=0.5 #extra noise sd
  for(i in 1:p) {
    b[i]=rbinom(1,round(n[i]*x[i])+1,1/(1+exp(-b0-b1*x[i]+s*(1-x[i])*rnorm(1))))/(round(n[i]*x[i])+1)
    w[i]=rbinom(1,round(n[i]*(1-x[i]))+1,1/(1+exp(-w0-w1*x[i]+s*(1-x[i])*rnorm(1))))/(round(n[i]*(1-x[i]))+1)
  }
  t=(w*round(n*(1-x))+b*round(n*x))/n #comb'd vote prop, in practice is observed
  bd=sum(round(n*x)*b)/sum(round(n*x)) #true district b
  generatedData <- list()
  generatedData[["x"]] <- x
  generatedData[["t"]] <- t
  generatedData[["n"]] <- n
  generatedData[["trueBetaB"]] <- bd
  return(generatedData)
}

generateDataExample5 <- function() {
  set.seed(7)
  p=1000 # number of precincts, in practice should be observed
  nn=150 
  n=rep(nn,p) #precinct sizes, in practice should be observed
  x=runif(p,0,0.7) #proportions of blacks, in practice should be observed
  l=min(x) 
  u=max(x) 
  ww0=0.9 #white vote prob in pure white precinct, unavailable in practice
  ww1=0.9 #white vote prob in pure black precinct, unavailable in practice
  bb0=0.5 #black vote prob in pure white precinct, unavailable in practice
  bb1=0.5 #black vote prob in pure black precinct, unavailable in practice
  w0=log(ww0/(1-ww0))
  #in practice w0 should be estimated by regressing t<-w0+c1x+d1x^2
  w1=log(ww1/(1-ww1))-log(ww0/(1-ww0))
  b0=log(bb0/(1-bb0))
  b1=log(bb1/(1-bb1))-log(bb0/(1-bb0))
  b=rep(0.5,p)
  w=b
  s=1 #extra noise sd
  for(i in 1:p) {
    b[i]=rbinom(1,round(n[i]*x[i])+1,1/(1+exp(-b0-b1*x[i]+s*rnorm(1))))/(round(n[i]*x[i])+1)
    w[i]=rbinom(1,round(n[i]*(1-x[i]))+1,1/(1+exp(-w0-w1*x[i]+s*rnorm(1))))/(round(n[i]*(1-x[i]))+1)
  }
  t=(w*round(n*(1-x))+b*round(n*x))/n #comb'd vote prop, in practice is observed
  bd=sum(round(n*x)*b)/sum(round(n*x)) #true district b
  generatedData <- list()
  generatedData[["x"]] <- x
  generatedData[["t"]] <- t
  generatedData[["n"]] <- n
  generatedData[["trueBetaB"]] <- bd
  return(generatedData)
}


################################################################################
## Calculate and evaluate the proposed bounds
################################################################################

calculateAndEvaluateBounds <- function(x, t, n, trueBetaB) {
  
  l=min(x) 
  u=max(x) 
  
  r=sum(n*x*(1-x))/sum(n*x)
  lb=1
  #this is our choice of lambda
  h0=lb*sum(n*x*t)/sum(n*x)
  s1= sqrt(sum((n*x*((1+lb)/2-lb*x))^2)/(sum(n*x))^2)
  cl=0.90
  #this is conf level
  alp=1-cl
  #ex=qnorm(1-alp/4)
  ex=1
  gl01=0
  gl1=c(-1/l,0,0)
  gl02=-1/(1-l)
  gl2=c(1/(1-l),1/(1-l),1/(1-l)-1)
  gu01=1/l
  gu1=c(-1/l,0,0)
  gu02=0
  gu2=c(1/(1-l),1/(1-l),1/(1-l)-1)
  ###
  gl03=0
  gl3=c(-1/u,0,0)
  gl04=-1/(1-u)
  gl4=c(1/(1-u),1/(1-u),1/(1-u)-1)
  gu03=1/u
  gu3=c(-1/u,0,0)
  gu04=0
  gu4=c(1/(1-u),1/(1-u),1/(1-u)-1)
  h=c(1-lb, sum(n*x*(1-lb*x))/sum(n*x),sum(n*x^2*(1-lb*x))/sum(n*x))
  ###### 
  bu= t/x
  bu=bu*1*(bu<1)+1*(bu>1) #duncan-davis upperbound at precinct level
  bl= (t-(1-x))/x
  bl= 0*1*(bl<0)+bl*1*(bl>0)#duncan-davis lowerbound at precinct level
  #in practice w0 should be estimated by regressing t<-w0+c1x+d1x^2
  # d1=b1-w1 #in practice d1 should be estimated by regressing t<-w0+c1x+d1x^2
  # c1=b0-w0+w1  #in practice c1 should be estimated by regressing t<-w0+c1x+d1x^2
  reg=lm(formula = t ~ poly(x, 2,raw=TRUE))#do quadratic regression here
  w0=coef(reg)[1]
  c1=coef(reg)[2]
  d1=coef(reg)[3]
  th=coef(reg)
  #library(sandwich)
  v=vcovHC(reg,"HC1")
  #this gives sandwich variance of STATA see 
  #https://stats.stackexchange.com/questions/117052/replicating-statas-robust-option-in-r
  #information re robust sandwich variance formula:
  #https://www.stata.com/manuals/p_robust.pdf
  #
  sl1=s1+sqrt( t(h-r*gu1)%*%v%*% (h-r*gu1))
  sl2=s1+sqrt( t(h-r*gu2)%*%v%*% (h-r*gu2))
  sl3=s1+sqrt( t(h-r*gu3)%*%v%*% (h-r*gu3))
  sl4=s1+sqrt( t(h-r*gu4)%*%v%*% (h-r*gu4))
  sl=c(sl1,sl2,sl3,sl4)
  su1=s1+sqrt( t(h-r*gl1)%*%v%*% (h-r*gl1))
  su2=s1+sqrt( t(h-r*gl2)%*%v%*% (h-r*gl2))
  su3=s1+sqrt( t(h-r*gl3)%*%v%*% (h-r*gl3))
  su4=s1+sqrt( t(h-r*gl4)%*%v%*% (h-r*gl4))
  su=c(su1,su2,su3,su4)
  bl1=h0-r*gu01+t(h-r*gu1)%*%th
  bl2=h0-r*gu02+t(h-r*gu2)%*%th
  bl3=h0-r*gu03+t(h-r*gu3)%*%th
  bl4=h0-r*gu04+t(h-r*gu4)%*%th
  bbl=c(bl1,bl2,bl3,bl4)
  bu1=h0-r*gl01+t(h-r*gl1)%*%th
  bu2=h0-r*gl02+t(h-r*gl2)%*%th
  bu3=h0-r*gl03+t(h-r*gl3)%*%th
  bu4=h0-r*gl04+t(h-r*gl4)%*%th
  bbu=c(bu1,bu2,bu3,bu4)
  wu=min(gu01+t(gu1)%*%th,gu02+t(gu2)%*%th,gu03+t(gu3)%*%th,gu04+t(gu4)%*%th)
  wl=max(gl01+t(gl1)%*%th,gl02+t(gl2)%*%th,gl03+t(gl3)%*%th,gl04+t(gl4)%*%th)
  
  #these are the conservative ci at ex=1
  cil=max(bbl)-ex*sl[which.max(bbl)]
  cir=min(bbu)+ex*su[which.min(bbu)]
  
  #######################################################################################
    
  hbdu0=min(bbu) #hat district level upperbound proposed (wtd)
  hbdl0=max(bbl) #hat district level lowererbound proposed (wtd)
   
  bdu=sum(n*x*bu)/sum(n*x) # district level upperbound by duncan-davis (wtd)
  bdl=sum(n*x*bl)/sum(n*x) # district level upperbound by duncan-davis (wtd)
  greg=lm(formula = t ~ poly(x, 1,raw=TRUE))#do goodman linear regression here
  a=coef(greg)[1]
  b=coef(greg)[2]
  gdmn=a+b #goodman result
  wk1=sum(n*x*(t+(1-x)*(c1+d1*x)))/sum(n*x) #wakeman w1=0
  wk2=sum(n*x*(t+(1-x)*(c1+d1*x+d1/2)))/sum(n*x) #wakeman w1=-d1/2
  #bdu-bdl  #width of interval district param, by duncan-davis
  
  cat(sprintf("True B: %f\n", trueBetaB))
  cat(sprintf("Duncan-Davis bounds: [%f, %f]\n", bdl, bdu))
  cat(sprintf("[l,u]=[min(X_i),max(X_i)]: [%f, %f]\n", l, u))
  cat(sprintf("CI_0=[Bl_hat, Bu_hat]: [%f, %f]\n", hbdl0, hbdu0))
  cat(sprintf("CI_1: [%f, %f]\n", cil, cir))
  #this width ratio measures efficiency of proposed interval:
  cat(sprintf("Width-ratio: |CI_0|/|DD|: %f\n", (hbdu0-hbdl0)/(bdu-bdl)))

}

main <- function() {
  cat("Example 3\n")
  generatedData <- generateDataExample3()
  calculateAndEvaluateBounds(generatedData$x, generatedData$t, generatedData$n, generatedData$trueBetaB)
  cat("\nExample 4\n")
  generatedData <- generateDataExample4()
  calculateAndEvaluateBounds(generatedData$x, generatedData$t, generatedData$n, generatedData$trueBetaB)
  cat("\nExample 5\n")
  generatedData <- generateDataExample5()
  calculateAndEvaluateBounds(generatedData$x, generatedData$t, generatedData$n, generatedData$trueBetaB)
}

main()
